This package is based on the code of Dr. Villette and Dr. Larsen. The
package is written and maintained by Dr. Villette. This package is meant
to facilitate microbiome exploration and ensuring nice plotting.
This package covers :
The dada2 pipeline with wrapper functions that ease the processing of multiple projects
Some plotting functions for beta diversity, heatmap and differential abundance analysis using directly a phyloseq object
A pipeline for IgASeq analysis
For convenience this will not be a reproducible example, dada2 takes too long to compute and knit. This part of the tutorial will present a run that we performed in house. The rest of the tutorial will be based on reproducible data.
We use here a wrapper function that will create a list of three for pair end :
forward files
reverse files
names of the files
And for single end : -
a list of files
names of the files
f_list = list_fastq("/home/bigbeast/Documents/tmp/2022-12 CIMMAP run ELISE",
pattern = c("R1", "R2"), separator = "_", level = 1)
# check that all files are distinct
lapply(f_list, duplicated)
# check that all files exist5
lapply(f_list, file.exists)
random= lapply(f_list, "[",sample(1:255,30))
# tmp= summarise_fastq(random, cores =30, plot = F )
We will use the function qc_check. This will take time
as the plotQualityProfile isn’t parallelized in dada2. This
function will create two plots (for pair end) of n
aggregated samples and only one plot if you are using single end.
qc_check(flist, n=30)
set.seed(1)
tmp = lapply(f_list, "[",sample(1:255,30))
tmp_list= filt_list(tmp)
filtered= list()
cb= combn(x= (300-seq(0, 50, by=10)), m=2)
# Very long be careful
#try to test combination of trimmings
for(i in 1:dim(cb)[2]){
filtered[[i]]= filter_fastq(tmp, tmp_list, cutting_param = cb[ ,i], cores = 35, trimleft = 35, maxEE = c(3,4))
}
#extract the data to a df
per= NULL
for(i in 1:15) {
t = filtered[[i]] %>%
as.data.frame() %>%
mutate(per=reads.out/reads.in)
per= cbind(per, t$per) %>%
as.data.frame()
# per$names=rownames(tmp[1])
print(per)
}
colnames(per)= paste(cb[1,], cb[2,])
per$names=rownames(t[1])
# plot
png("transmic and IgA rescue mice percent reads passed depending on cutting parameters.png", width = 600, height=400)
per %>%
as.data.frame() %>%
pivot_longer(names_to = "cut", values_to = "per", cols = 1:15) %>%
ggplot(aes(y=per, x= cut))+
geom_boxplot()+ geom_line(aes(group=names, col=names))+
labs(y= "percentage passed", x= "Trimming parameters")+
theme(axis.text.x = element_text(angle=90), legend.position = "none")
dev.off()
cb= combn(x= (7-seq(0,5, by=1)), m=2)
cb = cb[nrow(cb):1,]
# Very long be careful
#try to test combination of trimmings
# registerDoParallel(cl = makeCluster(5))
for(i in 1:dim(cb)[2]){
filtered[[i]]= filter_fastq(tmp, tmp_list, cutting_param = c(260,250),
cores = 35, trimleft = 35, maxEE = cb[ ,i])
}
#extract the data to a df
per= NULL
for(i in 1:15) {
t = filtered[[i]] %>%
as.data.frame() %>%
mutate(per=reads.out/reads.in)
per= cbind(per, t$per) %>%
as.data.frame()
}
colnames(per)= paste(cb[1,], cb[2,])
per$names=rownames(t[1])
# plot
png("transmic and IgA rescue mice percent reads passed depending on errors parameters.png", width = 600, height=400)
per %>% as.data.frame() %>% pivot_longer(names_to = "cut", values_to = "per", cols = 1:15) %>%
ggplot(aes(y=per, x= cut))+
geom_boxplot()+ geom_line(aes(group=names, col=names))+
labs(y= "percentage passed", x= "Trimming parameters")+
theme(axis.text.x = element_text(angle=90), legend.position = "none")
dev.off()
We now have to remove the bad quality reads and trim the length. You will find a function to create the list of filtered files and one to make the filtered files.
filt= filt_list(f_list) # create the list of filtered files
filtered = filterAndTrim(fwd= fwd, filt = filtFs, rev=rv, filt.rev = filtRs, truncLen = c(260,240), trimLeft = 25, maxEE = c(3,5), multithread = 45)
We will use the enterotype data to explore some of
the plotting functions. Let’s start with the beta diversity functions
beta_diversity and beta_dispersion.
data(enterotype)
Alpha diversity is an important facet of microbiome analysis. I’ve created wrapper function to plot either alpha diversity as boxplots or as line plots.
alpha_diversity(enterotype, measure="Shannon", x="Enterotype", group="SeqTech", plot_type="boxplot")
alpha_diversity(enterotype, measure="Shannon", x="Enterotype", group="SeqTech", plot_type="line")
These plots are compatible with other aspects of ggplot, such as facets or stats. Stats are also implemented in this function but are very sparse.
alpha_diversity(enterotype, measure="Shannon", x="Enterotype", group="SeqTech", plot_type="boxplot", stat = T)
library(ggpubr)
enterotype %>%
subset_samples(Enterotype!="NA")%>%
alpha_diversity( measure="Shannon", x="Enterotype", group="Enterotype", plot_type="boxplot")+
stat_compare_means(comparisons = list(c("1", "2")))+
facet_grid(~SeqTech)
You will have the choice between beta_diversity and
beta_dispersion for your beta diversity plotting.
beta_dispersion will plot PCoA, NMDS, PCA, DCA, CA and
t-SNE for the moment. This function will plot the two components of your
choosing, confidence ellipses and boxplot for each axis
and for each group. Each function will return a plot and a percentage of
contribution for each component.
beta_diversity will be removed progressively from this
package. This function is redundant with
beta_dispersion.
beta_diversity(enterotype, dist="bray", method="PCoA", group="SeqTech", permanova = F)
beta_dispersion(enterotype, dist = "bray", method = "PCoA", group = "SeqTech")
There are unconstrained and constrained approaches to beta diversity. Unconstrained analysis, more common in literature, are either based on:
You can play with the parameters of each function like the following plots :
beta_dispersion(enterotype, dist = "bray", method = "PCoA", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7)
beta_dispersion(enterotype, dist = "bray", method = "tsne", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech without boxplots", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7,
boxplot = F, where = "bottom")
beta_dispersion(enterotype, dist = "bray", method = "NMDS", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech without boxplots", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7,
boxplot = F, where = "bottom")
#> Run 0 stress 0.1380077
#> Run 1 stress 0.1567295
#> Run 2 stress 0.1524024
#> Run 3 stress 0.1547792
#> Run 4 stress 0.1580133
#> Run 5 stress 0.1571956
#> Run 6 stress 0.1587345
#> Run 7 stress 0.1601502
#> Run 8 stress 0.1507485
#> Run 9 stress 0.1581849
#> Run 10 stress 0.1552706
#> Run 11 stress 0.1508333
#> Run 12 stress 0.155805
#> Run 13 stress 0.152834
#> Run 14 stress 0.1488899
#> Run 15 stress 0.1519614
#> Run 16 stress 0.154909
#> Run 17 stress 0.1594624
#> Run 18 stress 0.1532283
#> Run 19 stress 0.1450935
#> Run 20 stress 0.1605477
#> *** Best solution was not repeated -- monoMDS stopping criteria:
#> 19: stress ratio > sratmax
#> 1: scale factor of the gradient < sfgrmin
beta_dispersion(enterotype, dist = "bray", method = "DCA", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech without boxplots", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7,
boxplot = F, where = "bottom")
beta_dispersion(enterotype, dist = "bray", method = "PCA", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech without boxplots", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7,
boxplot = F, where = "bottom")
#> 60.12674540477172.607364699821282.411668106401121.570776818932771.30799463920763
beta_dispersion(enterotype, dist = "bray", method = "CA", group = "SeqTech",
color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Sequencing tech without boxplots", lwd = 2,
font = 2, draw = "polygon", text = T, permanova = T,
y.intersp = 0.7,
boxplot = F, where = "bottom")
Constrained analysis are multiple regression of the unconstrained
approaches. They allow to fit the ecological data or sample data to the
community data. Alternatively, you can fit other omic data to your beta
diversity analysis. We have two different classes of constrained
analysis :
rda is a regression of a PCAcca is a regression of a CAdbRDA is a RDA based on a dissimilarity matrixbeta_dispersion,
and also the result of the constrained analysis.Vegan authors have implemented a very nice feature if you want to
make your own representation instead of using mine. You can set use
scores(res, tidy=T) to return a dataframe compatible with
ggplot2.
res= constrained_beta_dispersion(enterotype,
model= "SeqTech+Gender+Nationality+Age+ClinicalStatus",
group="Enterotype", method = "CCA",
boxplot =T,
text=T,
color_vector = tol21rainbow)
res= constrained_beta_dispersion(enterotype,
model= "SeqTech+Gender+Nationality+Age+ClinicalStatus",
group="Enterotype", method = "RDA",
boxplot =T,
text=T,
color_vector = tol21rainbow)
res= constrained_beta_dispersion(enterotype,
model= "SeqTech+Gender+Nationality+Age+ClinicalStatus",
group="Enterotype", method = "dbRDA",
boxplot =T,
text=T,
color_vector = tol21rainbow)
You can then use anova statistic test to decipher which
factor or feature is significantly associated with your beta
diversity
anova(res, by="terms")
#> Permutation test for capscale under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#>
#> Model: capscale(formula = as(otu_table(reverseASV(physeq)), "matrix") ~ SeqTech + Gender + Nationality + Age + ClinicalStatus, data = df, distance = dist, na.action = na.exclude)
#> Df SumOfSqs F Pr(>F)
#> Gender 1 0.06650 0.7390 0.634
#> Nationality 5 1.06616 2.3695 0.008 **
#> Age 1 0.43786 4.8657 0.001 ***
#> ClinicalStatus 3 0.30763 1.1395 0.278
#> Residual 26 2.33975
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This function will perform a top taxa at a rank of your choosing and
create a heatmap with annotations. For now only one annotation is
supported.
The clustering is made using the hclustfunction and a
Ward.D2 method. You can define the distance matrix that you want to use.
It is important to note that the distance is made before any trimming of
the data. This means that the distance matrix is made at the ASV/OTU
level before doing the rank merging and topping, so the clustering will
represent your “true” data instead of a modified dataset. In my
personnal opinion, this is the only way of performing clustering or any
type of generalized analysis : clustering, reducing or else on the
ASV/OTU/MAG/… level and then aggregates for plotting.
If you really want to cluster on other taxonomic level you should use
tax_glom before using this function.
phylo_heatmap(enterotype, top = 30, labels = "SeqTech", taxa_rank = "Genus", factor_to_plot ="Enterotype", split = 3, distance = "bray" )
#> [1] "No phylogenetic tree in this phyloseq object, bray-curtis distance selected."
#> [1] " not reversed"
The idea behind these functions is : creating a more automatic
pipeline enabling filtering and subseting of the phyloseq
object without having to perform a temporary phyloseq
object and a temporary distance object. With these function you can
directly use the “pipe” introduced by magrittr.
So you can use a subset_samples like the following and
automatically plot your beta diversity without adding too much code.
enterotype %>%
subset_samples(SeqTech=="Illumina") %>%
beta_dispersion(group="Enterotype", color_vector = c("#777711", "#117777", "#DD7788"),
legend_title = "Enterotypes \n with bray" ,
lwd = 2, font = 2, draw = "lines", text = T, permanova = T, y.intersp = 0.7,
where="bottomleft", cex = 3)
Additionally, if you want to go further you can also serialized the
code with a for loop or a lapply or even a
parallel approach using mclapply or
doParallel.
layout(matrix(c(1,2,3),
nrow = 1,
ncol = 3,
byrow = TRUE))
for(i in c("Illumina", "Sanger", "Pyro454")){
enterotype %>%
subset_samples(SeqTech==i & !is.na(Enterotype))%>%
beta_diversity(dist="bray", method="PCoA", group="Enterotype",
color_vector =c("#777711", "#117777", "#DD7788"),
factor_to_plot = paste("Enterotypes using \n",i, "sequencing"), lwd = 2, cpoint = 2)
}
Differential abundance testing is quite complicated because of the
number of different statistical approaches existing in the literature. I
personally use ALDEx2 and SIAMCAT a lot. The first because it is usually
highly regarded and the second for the overall easiness of the package.
For now differential_abundance implement only
ALDEx2 approach.
The function will return two datasets (all taxa and one with only the significant taxa) and two plots representing the taxa significantly represented in one of the two conditions. This function only covers two by two analysis.
data("GlobalPatterns")
tmp= GlobalPatterns %>%
subset_samples(SampleType=="Feces" | SampleType=="Soil")%>%
subset_taxa(Genus!="NA")%>%
tax_glom("Genus")
taxa_names(tmp)= paste0(tax_table(tmp)[,'Genus'], 1:length(tax_table(tmp)[,"Genus"]))
res= tmp %>%
differential_abundance( group="SampleType", col1 = "brown", col2="darkgreen", plot = T)
head(res$all_features)
#> we.ep we.eBH wi.ep wi.eBH
#> Nitrosopumilus3 0.039434214 0.12845619 0.06785714 0.1679920
#> CandidatusNitrososphaera4 0.076950444 0.21021090 0.05714286 0.1535102
#> Methanocorpusculum15 0.455287324 0.57808126 0.56294643 0.6631287
#> Methanobacterium17 0.213383476 0.33807069 0.23750000 0.3557473
#> Methanobrevibacter19 0.006651174 0.04949449 0.05714286 0.1535102
#> Propionibacterium20 0.577971720 0.69575331 0.68705357 0.7798239
#> rab.all rab.win.Feces rab.win.Soil diff.btw
#> Nitrosopumilus3 0.08098792 2.0623085 -3.6693107 -5.9920693
#> CandidatusNitrososphaera4 2.82754195 1.7897405 6.8959958 5.5896675
#> Methanocorpusculum15 -0.64269861 -1.2235194 -0.1461878 0.8975267
#> Methanobacterium17 -1.63628797 0.4582759 -3.4803052 -4.1461234
#> Methanobrevibacter19 5.46751181 9.0578346 -3.5233797 -13.4920823
#> Propionibacterium20 -1.87968665 -1.3475520 -3.1861066 -1.3178377
#> diff.win effect overlap
#> Nitrosopumilus3 3.358730 -1.8060263 0.0155616731
#> CandidatusNitrososphaera4 3.433853 1.9549948 0.0003653998
#> Methanocorpusculum15 3.099406 0.2413110 0.3782386315
#> Methanobacterium17 3.687806 -0.9656066 0.1347167785
#> Methanobrevibacter19 4.880169 -2.8634015 0.0003653998
#> Propionibacterium20 5.320815 -0.1949253 0.4166668604
res$barplot
res$volcano
TBD
Last but not least : machine learning. Machine learning is used in
general to predict outcome or predict class assignation between health
and disease status. For this purpose we developed wrapper functions to
screen models more easily. Most of the code is actually generated using
caret [https://topepo.github.io/caret/] package.
For now the functions implement randomForest, glmnet and plsda models, other model might work but I didn’t test them yet.
This function accepts either
# res = screen_models(enterotype, model="glmnet", cores = 1, number = 3, repeats = 3)
You’ve performed all your sortings and sequencing, you now have three
samples coming from a single individual.
We created a pipeline analysis where you will use the
neg1 (9/10 of the neg fraction) and
neg2 (1/10 of the neg fraction) dispersion (centered
and reduced) to create a normal dispersion, using a Z approach we will
have a Z score for the pos/neg1 based on the standard
deviation of neg1/neg2.
Here we can see what the analysis will look like:
The technical dispersion is assessed using
neg1/neg2for the log2 ratio and the
neg1*neg2abundance for log10 (black dot)
The biological dispersion is assessed pos/neg1 for
the log2 ratio and the pos*neg1 abundance for log10 (orange
dot)
We will then take windows of X ASV (for example 20) to create n
Gaussian curve and n …
The pipeline is based on three main functions that will call for other
functions : seq_table, slide_z and collapse_IgAseq.
First the seq_table function will take your phyloseq object and transform it to a list of data frames, one data frame for each samples coming from a single individual. For this function you will need to give physeq, sample_name corresponding to the name identifying the individual from which the sorted samples came, sorting_names the column where we find the samples such as : “sample1_pos”, “sample1_neg1”, “sample1_neg2”. Then the cols_to_keep that need to stay for now on “all”, it will collapse your sample_data in one column to allow you get it back later on.
The function will tell you if there is some samples are alone, if you have duplicated samples you need to sort them out or the rest of the pipeline will block. You can take a look at the architecture of the new object, you will find the ASV sequence as rownames, the taxonomy collapsed with “#” separator, the three samples having their own column and the sample_data collapse using also “#” separator. The function will only take the ASV that are present in the samples.
data("igaseq")
igaseq = transform_sample_counts(igaseq, function(x)x/sum(x))
sample_names(igaseq)= sample_data(igaseq)$sample_sort
seq.tab= seq_table(igaseq, sample_name = "sample_origin", sorting_names = "sample_sort", cols_to_keep = "all" )
DT::datatable(seq.tab$MO101, rownames = F)
The colnames will have the sample_names as given in the otu_table(), check that they correspond to your pos, neg1 and neg2.
Run the main function Now we will run the main function : slide_z. This function will take you seq_table object and run the Z function for each samples. If you are running this function for the first time use plot=T, if you already made the plots let it as FALSE it will make the loop a lot faster.
In this approach we will use log2 ratio and log10 of abundances. As you know log2(0/x) or log2(x/0) can’t be performed, so we need a way to deal with the zeros. We came up with two approach :
remove all ASV with a zero value
replace 0 by a random number between 0 and the min value found in one of the three samples
replace 0 by the minimum count in each samples, this probably the worst thing to do
In sample with few ASV, i.e meconiums, I strongly recommend using the random_generation while in adults samples the no_zero approach is performing better. For more complex samples the decision is up to you, you can use the function neg_dipsersion to visualize the two outcomes.
neg_dispersion(seq.tab[[6]], positive_sorted_sample = "pos", negative_sorted_sample = "neg1", second_negative_sample = "neg2", type = "superposed")
neg_dispersion(seq.tab[[6]], positive_sorted_sample = "pos", negative_sorted_sample = "neg1", second_negative_sample = "neg2",type = "facet all three")
# run the following if you want every samples, make sure to change the number of cores
# pdf("test.pdf", width = 10, height = 7)
# mclapply(seq, neg_dispersion, mc.cores = 6, type="superposed")
# dev.off()
The first step will be to create log2 ratios and log10 abundance
(log10 abundance of pos * neg1 for
example) for each ASV between the pos and the
neg1 (log_2_ratio - log10_abundance) and between the
neg1 and neg2 (log2_neg_ratio
- log10_neg_abundance). This will be done by the function
log_ratio called by the slide_z
function.
The function will also create an ellipse of confidence interval of your
choosing, default being
confidence_interval=c(0.95,0.99, 0.999), this is done by
the function ellipse_me also called by the `slide_z`
function.
The output will be a list of S4 objects containing the following slots :
ig_seq_all : containing all the samples and all the ASV
ig_up : containing all the samples and all the ASV that significantly enriched in the IgA positive fraction
ig_down : containing all the samples and all the ASV that are significantly enriched in the IgA negative fraction
In each slot you will find the following columns :
taxonomy
sample_id
new : the sample_data collapsed
pos : positive fraction abundance
neg1 : negative (9/10) fraction abundance
neg2 : negative (1/10) fraction abundance
log10_abundance = log10(pos * neg1)
log2_ratio = log2(pos / neg1)
log10_neg_abundance = log10(neg1 * neg2)
log2_neg_ratio = log2(neg1 / neg2)
taxa = tax_table collapsed
SlideNorm = the normalized dispersion for each ASV
score = IgAseq score
ellipse_level = the level of confidence
IgA_seq=list()
system.time(for(i in names(seq.tab)){
print(i)
IgA_seq[[i]]= slide_z(seq.tab[[i]], positive_sorted_sample = "pos", negative_sorted_sample = "neg1", second_negative_sample = "neg2", deltaX = 30, slide_version = "slide_z_modern", alpha = 0.05, plot = F, zero_treatment = "random generation")
})
We now need to collapse the list of IgA_seq objects into a single
one, just use collapse_IgAseq and separate the columns that
were pasted together.
IgA_seq = collapse_IgAseq(IgA_seq)
# DT:: datatable(IgA_seq@ig_seq_all, rownames = F)
# View(IgA_seq)
IgA_all= IgA_seq@ig_seq_all %>%
separate(taxonomy, into=c("Reign","Phylum", "Order","Class","Family", "Genus","Species","ASV", "rest"), sep="#") %>%
separate(col = new, into=colnames(sample_data(igaseq)),sep= "#" )
IgA_all$alpha= ifelse(IgA_all$score>1.96, "positively significative", ifelse(IgA_all$score< -1.96, "negatively significative", "not significative"))
dim(IgA_all)
For the example we will plot the IgAseq data like Gordon’s paper. We first make a wilcoxon test to decipher which genera are significantly different from zero. Then we plot as balloon plot using ggplot.
library(rstatix)
tmp= IgA_all %>%
group_by(Genus, donor)%>%
mutate(n=n())%>%
filter(n>5 , Genus!="NA")%>%
wilcox_test(score~1, mu=0, alternative ="two.sided", detailed = T)
tmp2= IgA_all %>%
group_by(Genus, donor)%>%
mutate(n=n())%>%
filter(n>5, Genus!="NA")%>%
dplyr::select(Genus, score, donor)%>%
mutate(mean_score= median(score))%>%
left_join(tmp)
tmp2$mean_score[tmp2$mean_score <= (-5)]= (-5)
alpha= ifelse(tmp2$p<0.05, -log10(tmp2$p), 0.5)
tmp2$donor= factor(tmp2$donor, levels = c("child_delivery","child_2_months","child_24_months", "mother_24_months"))
tmp2= tmp2%>%
ungroup()%>%
select(p, score, mean_score, Genus, donor)%>%
mutate(alpha= ifelse(tmp2$p<0.05, -log10(tmp2$p), 0),
alpha2= ifelse(tmp2$mean_score<0, -alpha, alpha),
mean_score2= rescale(c(abs(mean_score)), to=c(0,5)))
tmp2 %>%
ggplot(aes(0, Genus, fill=alpha2, size=mean_score2)) +
geom_point(shape=21) +
scale_fill_gradient2(low = "olivedrab4", mid = "white", high = "brown", midpoint = 0, guide=F)+
scale_size_continuous(breaks=c(0,3,3,4,4,5,5),labels=c(-5,-4,-3,0,3,4,5),range = c(0,5))+
guides(size = guide_legend( override.aes = list(fill =colorRampPalette(c("darkgreen", "white", "brown"))(7),
size=c(5,4,3,1,3,4,5)), nrow=1,
direction = "horizontal",
title.position = "top",
label.position = "bottom",
label.hjust = 0.5,
label.vjust = 1))+
labs(fill="", x="Age", size="IgAseq median score")+
facet_grid(Phylum~donor, scales="free", space = "free", labeller = labeller(donor=fac))+
theme(axis.text.y = element_text(size=8, face="bold"),
axis.text.x=element_blank(),
axis.ticks.x = element_blank(),
strip.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = 'bottom',
legend.title = element_text(size=10, face="bold"),
legend.text = element_text(size=10, face="bold"),
legend.box = "vertical",
legend.box.background = element_rect(colour = "black"),
strip.background = element_blank(),
strip.text = element_text(size=12, face="bold"),
panel.grid.major.y = element_line(linetype = 2, size=.05)
)
mixOmics package